/**
 * Project: complex.myapp
 * 
 * File Created at 2010-6-7
 * $Id$
 * 
 * Copyright 2008 Alibaba.com Croporation Limited.
 * All rights reserved.
 *
 * This software is the confidential and proprietary information of
 * Alibaba Company. ("Confidential Information").  You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Alibaba.com.
 */
package com.complex.myapp;



import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import junit.framework.TestCase;

public class MatcherBasic extends TestCase {
    
    /**
     * Pattern类：
     * Pattern类的静态方法compile用来编译正则表达式，在此[,\\s]+表示若干个","或者若干个空格匹配
     * split方法使用正则匹配将字符串切割成各子串并且返回
     * @throws Exception
     */
    public void test1() throws Exception {
        Pattern pattern = Pattern.compile("[,\\s]+");
        String[] result = pattern.split("one two  three,four , five,six");
        for (String str : result) {
            System.out.println(str);
        }
    }
    
    /**
     * Matcher类：
     * 注意，Matcher的获得是通过Pattern.matcher(CharSequence charSequence);输入必须是实现了CharSequence接口的类
     * 常用方法：
     * matches()判断整个输入串是否匹配，整个匹配则返回true
     * lookingAt()从头开始寻找，找到匹配则返回true
     * @throws Exception
     */
    public void test2() throws Exception {
        String str1 = "hello";
        Pattern pattern1 = Pattern.compile("hello");
        Matcher matcher1 = pattern1.matcher(str1);
        System.out.println("matcher1.matches()=>" + matcher1.matches());
        
        String str2 = "hello world";
        Pattern pattern2 = Pattern.compile("hello");
        Matcher matcher2 = pattern2.matcher(str2);
        System.out.println("matcher2.matches()=>" + matcher2.matches());
        System.out.println("matcher2.lookingAt()=>" + matcher2.lookingAt());
    }
    
    /**
     * find()扫描输入串，寻找下一个匹配子串，存在则返回true
     * @throws Exception
     */
    public void test3() throws Exception {
        Pattern pattern = Pattern.compile("hello");
        Matcher matcher = pattern.matcher("hello world, hello world, hello_world");
        StringBuffer sb = new StringBuffer();
        boolean find = matcher.find();
        while(find) {
            matcher.appendReplacement(sb, "haha");  //实现非终端添加和替换步骤
            find = matcher.find();
            System.out.println("sb=>" + sb);
        }
        matcher.appendTail(sb);     //实现终端添加和替换步骤
        System.out.println(sb.toString());
    }
    
    /**
     * 匹配IP地址
     * IP地址中的句点字符必须进行转义处理（前面加上“\”），因为IP地址中的句点具有它本来的含义，
     * 而不是采用正则表达式语法中的特殊含义。句点在正则表达式中的特殊含义本文前面已经介绍。 
     * 日志记录的时间部分由一对方括号包围。你可以按照如下思路提取出方括号里面的所有内容：
     * 首先搜索起始方括号字符（“[”），提取出所有不超过结束方括号字符（“]”）的内容，向前寻找直至找到结束方括号字符。
     * @throws Exception
     */
    public void test4() throws Exception {
        String logEntry = "192.168.0.1 - - [26/Feb/2009:14:56:43 -0500]\"GET /lsAlive/ht HTTP/1.0\"200 15\r\n"
            +"192.168.0.2 - - [25/Feb/2009:14:56:43 -0500]\"GET /lsAlive/ht HTTP/1.0\"200 15";
        String regexp = "([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})\\s-\\s-\\s\\[([^\\]]+)\\]";
        Pattern pattern = Pattern.compile(regexp);
        Matcher matcher = pattern.matcher(logEntry);
        boolean find = matcher.find();
        while(find) {
            MatchResult result = matcher.toMatchResult();
            System.out.println("IP=>" + result.group(1));
            System.out.println("Timestamp=>" + result.group(2));
            find = matcher.find();
        }
    }
    
    /**
     * HTML处理
     * 分析HTML页面内FONT标记的所有属性
     * @throws Exception
     */
    public void test5() throws Exception {
        String html = "<font face=\"Arial Serif\" size=\"+2\" color=\"red\">";
        String regexForTag = "<\\s*font\\s+([^>]*)\\s*>";
        
        Pattern pattern = Pattern.compile(regexForTag, Pattern.CASE_INSENSITIVE);
        Matcher matcher = pattern.matcher(html);
        
        boolean find = matcher.find();
        
        String attribute = matcher.group(1);
        System.out.println("属性字符串为：" + attribute);
        
        String regexForAttribute = "([a-z]+)\\s*=\\s*\"([^\"]+)\"";
        Pattern pattern2 = Pattern.compile(regexForAttribute, Pattern.CASE_INSENSITIVE);
        Matcher matcher2 = pattern2.matcher(attribute);
        
        boolean find2 = matcher2.find();
        
        while(find2) {
            MatchResult result = matcher2.toMatchResult();
            System.out.println(result.group(1) + "=" + result.group(2));
            find2 = matcher2.find();
        }
    }
    
    /**
     * HTML处理
     * 修改一些页面中的链接
     * @throws Exception
     */
    public void test6() throws Exception {
        String url = "<a href=\"http://192.168.0.1:8080/test/index.jsp#test...\">"
            + "< a href = \"http://192.168.0.1:8080/test/index.jsp#?hahahaha...\">";
        String regex = "(<\\s*a\\s+href\\s*=\\s*\"http://192.168.0.1:8080/test/index.jsp[^\"]+\">)";
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(url);
        boolean find = matcher.find();
        System.out.println("find=>" + find);
        while(find) {
            MatchResult result = matcher.toMatchResult();
            String temp = result.group(1);
            System.out.println("替换前=>" + temp);
            temp = temp.replace("192.168.0.1", "localhost");
            System.out.println("替换后=>" + temp);
            find = matcher.find();
        }
    }
    
    /**
     * 4种常用功能：
     * 1、查询：
     * 如果str中有regEx，那么rs为true，否则为flase。如果想在查找时忽略大小写，
     * 则可以写成Pattern p=Pattern.compile(regEx,Pattern.CASE_INSENSITIVE);
     * @throws Exception
     */
    public void testQuery() throws Exception {
        String str = "abc efg ABC";
        String regEx = "a|f";
        Pattern pattern = Pattern.compile(regEx);
        Matcher matcher = pattern.matcher(str);
        boolean rs = matcher.find();
        System.out.println("rs=>" + rs);
    }
    
    /**
     * 2、提取：
     * 执行结果为name.txt，提取的字符串储存在m.group(i)中，其中i最大值为m.groupCount();
     * @throws Exception
     */
    public void testGet() throws Exception {
        String regEx = ".+\\\\(.+)$";
        String str = "c:\\dir1\\dir2\\name.txt";
        Pattern pattern = Pattern.compile(regEx);
        Matcher matcher = pattern.matcher(str);
        boolean rs = matcher.find();
        for (int i = 1; i <= matcher.groupCount(); i++) {
            System.out.println(matcher.group(i));
        }
    }
    
    /**
     * 3、分割：
     * @throws Exception
     */
    public void testSplit() throws Exception {
        String regex = "::";
        Pattern pattern = Pattern.compile(regex);
        String[] result = pattern.split("aa::bb::cc");
        for (String str : result)
            System.out.println("result=>" + str);
        
        System.out.println("---------");
        String[] normal = "aa::bb::cc".split(regex);
        for (String str : normal) 
            System.out.println("nornal=>" + str);
    }
    
    /**
     * 4、替换（删除）：
     * 如果写成空串，既可达到删除的功能
     * @throws Exception
     */
    public void testReplaceOrDelete() throws Exception {
        String regex = "a+";
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher("aaabbced a ccdeaa");
        System.out.println("replaceFirst=>" + matcher.replaceFirst("A"));
        String result = matcher.replaceAll("A");
        System.out.println("replaceAll=>" + result);
        String delete = matcher.replaceAll("");
        System.out.println("替换为空即可达到删除的功能");
    }

    @Override
    protected void setUp() throws Exception {
        // TODO Auto-generated method stub
        super.setUp();
    }

    @Override
    protected void tearDown() throws Exception {
        // TODO Auto-generated method stub
        super.tearDown();
    }
    
}
